# -*- coding: utf-8 -*-
"""

Merging vessel characterisics with voyage spatial data for plotting
Saving one shapefile with all voyages

@author: rklotz
"""

import pandas as pd
import numpy as np
import geopandas as gpd
from datetime import datetime
import os

import final_track_functions


###############################################################################

# Inputs 
data_folder = r"H:\My Drive\Boats\ReplicationCode\data"

# track files (using these to get OID - IMO map; and for some route statistics)
tracks_csv = os.path.join(data_folder,'AIS','voyages_v2a',"voyages_%s.csv")
             
# vessel chars
vessel_char_csv =  os.path.join(data_folder,"wfr_wHist_09182020.csv") 
#vessel_char_csv = r"H:\My Drive\Boats\Stata\CA_tracks\data\wfr_wHist.csv"

# mmsi imo map
mmsi_imo_dta = os.path.join(data_folder,'AIS','voyages_v2a',"mmsi_imo_my.dta")
#mmsi_imo_dta = r"H:\My Drive\Boats\Stata\results\CAtracks_wFuel\mmsi_imo_my.dta"


#tracks_gdb = r"C:\Users\rklotz\Documents\data\lines_v2\processed_lines_wFuel.gdb"
tracks_gdb = os.path.join(data_folder,'AIS','voyages_dense_v2a.gdb')                    
layer_tag = 'wc_dense_%g'
tracks_where = "" # "PORT1=7 and PORT2=16" # ""
years = [2009, 2010 ,2011,2012,2013,2014,2015,2016] 

# track data - has route definitions etc
final_tracks_csv = os.path.join(data_folder,'final',"CA_tracks_v2a.csv")
track_vars = ['OID','AISyear','route_name','dirNW','port_name1_ECregion','port_name2_ECregion',
              'port_name1_ECcountry','port_name2_ECcountry','port_name1_ECport','port_name2_ECport','km_onland',
              'elsegundo','rosarito','ensenada','PORT_ID']


# Output
tracks_out = os.path.join(data_folder,'final','voyages_shp',"voyages.shp") 

##############################################################################
# make folder for output
if not os.path.exists(os.path.split(tracks_out)[0]):
    os.makedirs(os.path.split(tracks_out)[0])   

# get track to IMO mapping
print('Getting track to IMO map')
df_oid_imo , df_oid_mmsi_date = final_track_functions.get_imo_by_track(tracks_csv,mmsi_imo_dta,years) 

# load track information (for route data)
df_final_tracks = pd.read_csv( final_tracks_csv , index_col=False  , usecols = track_vars  ) 

### creating an inter_port flag
# consistent with what is done in stata
# tracks that stop at ensenada, el segundo, rosarito
df_final_tracks.PORT_ID=df_final_tracks.PORT_ID.str.replace("130", "9999") # preventing clash between 13 and 130
df_final_tracks['inter_port'] = 1 * ( (df_final_tracks.elsegundo==1) | (df_final_tracks.rosarito==1) | (df_final_tracks.ensenada==1) | 
                                   df_final_tracks.PORT_ID.str.contains("57") | df_final_tracks.PORT_ID.str.contains("58") | df_final_tracks.PORT_ID.str.contains("13")  )
df_final_tracks.PORT_ID=df_final_tracks.PORT_ID.str.replace( "9999", "130" ) # preventing clash between 13 and 130

### adding grouped ports
for i in [1,2] :
    df_final_tracks["port_name"+str(i)+"_ec"] = df_final_tracks["port_name"+str(i)+"_ECport"] 
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["CAm","SAm","US_EC","Af","Eur"] )
                                 | ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["NAm"])  & ~df_final_tracks["port_name"+str(i)+"_ECcountry"].isin(["Canada, WC","US_WC"]) )     
                                 )  , "South" , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["MidE","Asia"] ) )  , "SEAsia_MidEast" , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECcountry"].isin(["China South","China Central","China North","China Taiwan","Hong Kong","Japan","South Korea","Russia, EC"] ) ) 
                                , df_final_tracks["port_name"+str(i)+"_ECcountry"] , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECcountry"].isin(["Canada, WC","Sea"] ) )  , df_final_tracks["port_name"+str(i)+"_ECcountry"] , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["Oce"] ) )  , df_final_tracks["port_name"+str(i)+"_ECregion"] , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["Hong Kong"] ) )  , "China South" , df_final_tracks["port_name"+str(i)+"_ec"] )
    df_final_tracks["port_name"+str(i)+"_ec"] = np.where( 
                                ( df_final_tracks["port_name"+str(i)+"_ECregion"].isin(["China Taiwan"] ) )  , "China Central" , df_final_tracks["port_name"+str(i)+"_ec"] )

df_final_tracks['minport'] = df_final_tracks[['port_name1_ec','port_name2_ec']].min(axis=1)
df_final_tracks['maxport'] = df_final_tracks[['port_name1_ec','port_name2_ec']].max(axis=1)
df_final_tracks['route_ec'] = df_final_tracks['minport'] + "_" + df_final_tracks['maxport']
df_final_tracks.route_ec = df_final_tracks.route_ec.str.replace(' ','').replace(',','')

df_final_tracks = df_final_tracks.loc[:,['OID','AISyear','dirNW','route_ec','km_onland','route_name','inter_port','port_name1_ECregion','port_name2_ECregion']]

# load vessel chars and add some required variables
df_ves_char = pd.read_csv( vessel_char_csv , index_col=False ,  low_memory=False) 
df_ves_char = df_ves_char.replace(r'\r',' ', regex=True)
df_ves_char = df_ves_char.replace(r'\n',' ', regex=True)
         
# load mmsi imo map
df_mmsi_imo = pd.read_stata( mmsi_imo_dta )
df_mmsi_imo['m'] = df_mmsi_imo['my'].dt.month
df_mmsi_imo['y'] = df_mmsi_imo['my'].dt.year
df_mmsi_imo = df_mmsi_imo.rename(columns={'mmsi':'MMSI'})

# getting just vessel chars for fuel calcs
# and for speed profiles
ves_chars = ['group_agg','group','loa','dwt','built','power_kw','flag','Power Type']
df_ves_char_tracks = df_ves_char[['IMO']+ves_chars]

gdf_list = [] 
for year in years :
    print(year)

    gdf_tracks = gpd.GeoDataFrame.from_file(tracks_gdb, layer=layer_tag % year)
    gdf_tracks['OID'] = gdf_tracks.TARGET_FID  
    gdf_tracks['AISyear'] = year 
    
    # storing original IMO
    gdf_tracks['imo_orig'] = gdf_tracks.IMO   
    
    if year in [2010,2011,2012,2013,2014]:
        # merge in im mmsi map
        gdf_tracks = gdf_tracks.merge(df_oid_imo,on=['OID','AISyear'],how='left')
        gdf_tracks['IMO'] = gdf_tracks.imo_m 
        #df_pnts = df_pnts.drop(columns='imo_m')
        #df_pnts = df_pnts.rename( columns={'imo_m':'IMO'} )
        
    # stripping IMO text
    if year in [2015,2016] :
        gdf_tracks['IMO'] = gdf_tracks["IMO"].str.lstrip("IMO").fillna(-9999).astype(int) # .astype(int) # stripping IMO 

    # merge chars
    gdf_tracks = gdf_tracks.merge(df_ves_char_tracks, on='IMO' , how='left')
        
    gdf_list.append(gdf_tracks)


gdf_all = gpd.GeoDataFrame(pd.concat(gdf_list, ignore_index=True,sort=True), crs=gdf_list[0].crs)

# merge track information
# doing inner so we only keep tracks in final data set
gdf_all = gdf_all.merge(df_final_tracks,on=['OID','AISyear'],how='inner')


gdf_all['date'] = pd.to_datetime(gdf_all['TIME1'],utc=True).dt.floor('D')
gdf_all['t_eca1'] = gdf_all.date.dt.tz_convert(None) - datetime(2009,7,1)
gdf_all['t_eca1'] = gdf_all['t_eca1'].dt.days
gdf_all['t_eca2'] = gdf_all.date.dt.tz_convert(None) - datetime(2011,12,1)
gdf_all['t_eca2'] = gdf_all['t_eca2'].dt.days
gdf_all['t_eca3'] = gdf_all.date.dt.tz_convert(None) - datetime(2012,8,1)
gdf_all['t_eca3'] = gdf_all['t_eca3'].dt.days
gdf_all['t_eca4'] = gdf_all.date.dt.tz_convert(None) - datetime(2014,1,1)
gdf_all['t_eca4'] = gdf_all['t_eca4'].dt.days
gdf_all['t_eca5'] = gdf_all.date.dt.tz_convert(None) - datetime(2015,1,1)
gdf_all['t_eca5'] = gdf_all['t_eca5'].dt.days
gdf_all = gdf_all.drop(labels='date' , axis=1)
# create two indexes, 1 to ECA implementation, 1 to boundary change

# saving
print("Saving")
gdf_all.to_file(tracks_out , encoding='utf-8') 
